#encoding=utf-8
import jieba
import csv
import jieba.posseg as pseg
import jieba
#处理python编码格式
import sys
reload(sys)
sys.setdefaultencoding('utf8')
#------------------------

csv_reader = csv.reader(open('movieItem.csv'))
total_comment = open('total_comment.txt', 'w')
total_sense_com = open('total_sense_com.txt', 'w')
total_country = open("total_country.txt", 'w')
china_movie = open("china_movie.txt", "w")

for row in csv_reader:
        seg_list = pseg.cut(row[5])
        for i in seg_list:
            if len(i.word) > 1:
                total_comment.write(" "+i.word)
            if i.flag == 'v' and len(i.word) > 1:
                total_sense_com.write(" " + i.word)

csv_reader = csv.reader(open('movieItem.csv'))

for row in csv_reader:
    if '/' in row[2]:
        results = row[2].split('/')
        for i in results:
            if i == '中国大陆' or i == '中国':
                china_movie.write(' '+row[0])
            if i == '中国大陆':
                total_country.write(' ' + '中国')
            else:
                total_country.write(' '+i)
    else:
        if len(row[2]) <= 1 or row[2] == 'NULL':
            pass
        else:
            if row[2] == '中国大陆' or row[2] == '中国':
                china_movie.write(' ' + row[0])
            total_country.write(' '+row[2])
china_movie.close()
total_country.close()
total_comment.close()
total_sense_com.close()


